capture log close
clear all
set maxvar 10000
*set matsize 11000
set more off
pause off
# delimit;

local output "";
local crime "";

/* 
Sibling prep 2
*/


/* PO/drug/property breakout variables */
local abbr_all "bk" ;
local abbr_non_income 	"nobk" ;
local abbr_vice "vice" ;

foreach type in all non_income vice { ;
	use "`crime'\reference\reference_event_`type'_breakout.dta", clear ;
	foreach var of varlist _all { ;
		rename `var' `abbr_`type''_`var' ;
	} ;
	rename `abbr_`type''_cjars_id cjars_id ;
	tempfile `abbr_`type'' ;
	save ``abbr_`type''' ;
} ;


/* Merge in the linked crime records */
use "`crime'\831\crime_prworasib_formerge_prep.dta", clear ;
drop if sib_ssn == "" ;

duplicates drop hun sib_ssn, force;
merge 1:m hun sib_ssn using "`crime'\prep1\crime_prworasib_merged.dta", keepusing(part cjars_id);
gen byte indiv_match=(_merge==3);
drop if _merge==2;
drop _merge;
count;

tab indiv_match part, m;
pause ;



/* ****************************** */
/* **** ELIMINATE DUPLICATES **** */
/* ****************************** */

duplicates report hun sib_ssn;
duplicates report hun sib_ssn part;

/* Drop parts */
tab part, m ;
forval i = 1(1)2 {;
	gen byte temp`i' = (part == `i') ;
	egen part`i' = max(temp`i'), by(hun sib_ssn) ;
	tab part`i', m ;
};
drop temp* ;

drop if part1==1 & part!=1 ;
drop if part1==0 & part2==1 & part!=2 ;
tab part, m ;
drop part1 part2 ;

/* Verify only one part by adding in "part" to duplicates and seeing that it doesn't change anything */
duplicates report hun sib_ssn ;
duplicates report hun sib_ssn part ;

/* Randomly drop all but one */
duplicates drop hun sib_ssn, force ;

tab indiv_match, m ;



/* ********************************************* */
/* **** MERGE IN EVENTS FROM REFERENCE FILE **** */
/* ********************************************* */

/* Merge reference file */
merge m:1 cjars_id using "`crime'\reference\reference_event.dta" ;
drop if _merge == 2 ;
drop _merge ;

/* Merge reference file for breakouts */
foreach type in bk nobk vice { ;
	merge m:1 cjars_id using ``type'' ;
	drop if _merge == 2 ;
	drop _merge ;
} ;

/* Merge incarceration spells */
foreach type in hybrid_incarceration incarceration parole probation hybrid_jail { ;
	merge m:1 cjars_id using "`crime'\reference\\`type'_annual_1997_2019_flat.dta", keepusing(days_in_*) ;
	drop if _merge == 2 ;
	drop _merge ;
	if "`type'" == "hybrid_incarceration" { ;
		forval yr = 1997(1)2019 { ;
			capture rename days_in_prison`yr' days_in_hprison`yr' ;
		} ;
	} ;
} ;

foreach var of varlist days_in_* { ;
	replace `var' = 0 if `var' == . ;
} ;


/* **************************************************** */
/* **** RESHAPE WIDE TO GET UNIQUE HUN-sib_ssn PAIR **** */
/* **************************************************** */

/* Reshape long, then wide to get one observation per hun-sib_ssn pair */
preserve ;
keep hun sib_ssn cjars_id part event* day_of_week* ;
forval i=1(1)9 { ;
	rename event0`i' event`i' ;
	rename event_date0`i' event_date`i' ;
	rename event_state0`i' event_state`i' ;
	rename day_of_week0`i' day_of_week`i' ;
} ;
replace cjars_id = "NA" if cjars_id == "" ;
egen id= group(hun sib_ssn cjars_id) ;
reshape long event event_date event_state day_of_week, i(id) j(num) ; 
drop id num cjars_id ;
drop if event_date == . ;
sort hun sib_ssn event_date ;
egen id = group(hun sib_ssn) ;
bysort id: gen num = _n ;
drop if num > 20 ;
reshape wide event event_date event_state day_of_week part, i(id) j(num) ;

tempfile events ;
save `events' ;
restore ;

foreach type in all non_income vice { ;

	preserve ;
	keep hun sib_ssn cjars_id part `abbr_`type''_event* ;
	forval i=1(1)9 { ;
		rename `abbr_`type''_event0`i' `abbr_`type''_event`i' ;
		rename `abbr_`type''_event_date0`i' `abbr_`type''_event_date`i' ;
		rename `abbr_`type''_event_state0`i' `abbr_`type''_event_state`i' ;
	} ;
	replace cjars_id = "NA" if cjars_id == "" ;
	egen id= group(hun sib_ssn cjars_id) ;
	reshape long `abbr_`type''_event `abbr_`type''_event_date `abbr_`type''_event_state, i(id) j(num) ; 
	drop id num cjars_id ;
	drop if `abbr_`type''_event_date == . ;
	sort hun sib_ssn `abbr_`type''_event_date ;
	egen id = group(hun sib_ssn) ;
	bysort id: gen num = _n ;
	drop if num > 20 ;
	reshape wide `abbr_`type''_event `abbr_`type''_event_date `abbr_`type''_event_state part, i(id) j(num) ;

	tempfile `abbr_`type''_events ;
	save ``abbr_`type''_events' ;
	restore ;
} ;

/* Collapse incarceration records to get one obs per hun-sib_ssn pair */
keep hun sib_ssn dob cjars_id part days_in_* ;
collapse (sum) days_in_*
	, by(hun sib_ssn) ;
tempfile days ;
save `days' ;


/* ********************* */
/* **** FINAL MERGE **** */
/* ********************* */

/* Get one observation for hun-pan pair for "match" variable */
use "`crime'\prep1\crime_prworasib_merged.dta", clear ;
drop if sib_ssn == "" ;

sort hun sib_ssn part ;
duplicates drop hun sib_ssn, force ;
tempfile match ;
save `match' ;

/* Merge in "match" variable */
use "`crime'\831\crime_prworasib_formerge_prep.dta", clear ;
duplicates drop hun sib_ssn, force;

merge 1:1 hun sib_ssn using `match', keepusing(part);
gen byte indiv_match=(_merge==3);
drop if _merge==2;
drop _merge;
count;

/* Merge in event dates */
merge 1:1 hun sib_ssn using `events' ;
drop if _merge==2;
drop _merge;

foreach type in all non_income vice { ;
	merge 1:1 hun sib_ssn using ``abbr_`type''_events' ;
	drop if _merge==2;
	drop _merge;
} ;

merge 1:1 hun sib_ssn using `days' ;
drop if _merge==2;
drop _merge;

count;

/* Save data set */
save "`crime'\prep2\crime_prworasib_remerge.dta", replace;

capture log close;
